In [1]:
# libraries and configs
import pandas as pd
import numpy as np
from pandas.tseries.offsets import *
from scipy.stats import norm
import plotly.offline as po
import plotly.graph_objs as go
import plotly.figure_factory as ff
po.init_notebook_mode(connected = True)
import matplotlib.pyplot as plt
%matplotlib inline
import qgrid
qgrid.nbinstall(overwrite=True)
from pivottablejs import pivot_ui
import colorlover as cl
greys = cl.scales['9']['seq']['Greys']
greys = cl.interp(greys, 10)
pd.set_option('display.max_rows', 24)

# master data
master = pd.read_csv('data/master.csv', index_col = 0)
master.index = pd.to_datetime(master.index)
# overwrite the "dip" on 2016-08-10 13:00
master.loc['20160810 13:00', 'mw'] = np.mean([master.loc['20160810 12:00', 'mw'],master.loc['20160810 14:00', 'mw']])
master['base_period'] = [el.year if el.month > 4 else el.year - 1 for el in master.index]
master.average = master.average.fillna(master.forecast)
master.peak = master.peak.fillna(master.forecast)

def get_tops(slc, i, col):
    d = slc.loc[slc.groupby(pd.TimeGrouper('D'))[col].idxmax()]
    s = d.groupby(slc.base_period)[col].apply(lambda x: x.nlargest(i))
    return d.loc[s.index.levels[1]]
def get_mins(slc, i, col):
    d = slc.loc[slc.groupby(pd.TimeGrouper('D'))[col].idxmin()]
    s = d.groupby(slc.base_period)[col].apply(lambda x: x.nsmallest(i))
    return d.loc[s.index.levels[1]]
def get_colors(i, opacity):
    colors = ['rgba(192, 57, 43,' + str(opacity) + ')',
              'rgba(234,236,229,' + str(opacity) + ')', 
              'rgba(192,222,217,' + str(opacity) + ')',
              'rgba(143,191,183,' + str(opacity) + ')',
              'rgba( 92,154,144,' + str(opacity) + ')',
              'rgba( 59, 58, 48,' + str(opacity) + ')',
             ]
    return colors[i]

Hi Hi!

This is a static page (as in it doesnt not intepret nor run my code) thus some charts are with limited level of interactivity - these data points/calculations were "embedded" in the HTML (or the "webpage"). I try to provide as much interactivity as I can however some operations, such as to filter/change time period or to view charts for individual years, can only be done in the code. - It can be done very easily in the code. To do that, you will need to setup the Python environment on your local PC and gain some knowledge of Python - one of the most powerful, widely used multi-purpose programing language.

If you have any issues with the content or have any questions/thoughts, feel free to email me!

Ontario Demand

An exploratory overview/analysis of historical Ontario Demand and Coincident Peaks in IESO market

Data Used

For an overview of the Industrial Conservation Initiative (ICI) program, click here to download the ICI Backgrounder.

The following data were collected and used for this analysis:

  • Historical Ontario Demand (full description, csv download): hourly demand data from 2002-05-01 (market opening) to date. However, those are not the most accurate figures. The precise hours and amount used for ICI settlement is based on the adjusted AQEW (Allocated Quantity of Energy Withdrawn) values. AQEW are availble 20 business days after the trade date and adjusted to omit the net energy withdrawn by Beck PGS, Fort Frances, and for ancillary services.
    - I have not been able to find a public data directy for AQEW.
  • Forecasted Ontario Demand (full description, data directory): hourly forecast data from 2017-04-08 to date. IESO publishes the forecasted Ontario demand looking out over the next 34 days. These forecast are updated twice per day for days 2 to 34, 13 times for the next day and twice per hour for current day.

In addition:

  • Historical Climate Data: hourly weather data from Environment and Natural Resources Canada for 124 weather stations across Ontario for months of Jan, Feb, Jun - Sep, 2012 - 2017. Weather data from selected stations were used based on proximity (nearest five) to population centres in Ontario. Population distribution is also used to determine the weight of selected station data.

Historical Demand

i. Peak Distribution

In [2]:
yrs, yre = 2002, 2016
ntop = 5

mask = pd.DataFrame(0, columns = np.arange(24), index = np.arange(1, 13))
dummy = pd.date_range('1990-01-01 0:00', periods = 25, freq = 'H')
x_labels = dummy.strftime('%I%p')
x_labels = [x[0].replace('0','') + x[1:] for x in x_labels]
dummy = pd.date_range('1990-01-01', periods = 12, freq = 'm')
y_labels = dummy.strftime('%b')
data = []
if yrs == yre:
    title_str = str(yrs)
else:
    title_str = str(yrs) + ' - ' + str(yre)

for ntop in range(5, 55, 5):
    tops = get_tops(master, ntop, 'mw')[['mw', 'base_period']]
    slc = tops.loc[(tops.base_period > yrs - 1) & (tops.base_period < yre + 1), 'mw']
    counts = slc.groupby((slc.index.month, slc.index.hour)).count().unstack(level = 1) + mask
    counts.fillna(0, inplace = True)
    
    trace1 = go.Heatmap(
        name = 'Count',
        z = counts.values,
        x = np.arange(24),
        y = y_labels,
        xaxis = 'x',
        yaxis = 'y',
        colorscale = 'Greys',
        reversescale=True,
        opacity = 0.6,
        showscale = False,
        visible=False,
    )

    trace2 = go.Bar(
        name = 'Frequency',
        x = np.arange(24),
        y = counts.sum(axis = 0)/counts.sum(axis = 0).sum(),
        marker = dict(color = 'grey'),
        opacity = 0.75,
        xaxis = 'x3',
        yaxis = 'y2',
        visible=False,
        #y = y_labels,
    )

    trace3 = go.Bar(
        name = 'Frequency',
        y = np.arange(13),    
        x = counts.sum(axis = 1)/counts.sum(axis = 1).sum(),
        orientation = 'h',
        marker = dict(color = 'grey'),
        opacity = 0.75,
        xaxis = 'x2',
        yaxis = 'y3',
        visible=False,
#        x = x_labels,      
    )
    
    data.extend([trace1, trace2, trace3])

data[0]['visible'] = True
data[1]['visible'] = True
data[2]['visible'] = True

steps = []
labels = ['Top ' + str(i) for i in range(5, 55, 5)]
for i in range(int(len(data)/ 3)):
    step = dict(
        method = 'restyle',
        args = ['visible', [False] * len(data)],
        label = labels[i]
    )
    step['args'][1][i*3] = True
    step['args'][1][i*3 + 1] = True
    step['args'][1][i*3 + 2] = True
    steps.append(step)

sliders = [dict(
    active = 0,
    currentvalue = {"prefix": "# of Peaks: "},
    pad = {"t": 60},
    steps = steps,
)]

layout = go.Layout(
    autosize=False,
    width=900,
    height=600,
    title = 'Peak Distribution by Month and Hour<br>(' + title_str + ')',
    sliders = sliders,
    hovermode = 'closest',
    xaxis = dict(
        linecolor = 'grey',
        mirror = True,
        domain = [0, 0.86],
        tick0 = -0.5,
        dtick = 1,
        zeroline = False,
        showgrid = True,
        showticklabels = False,
    ),
    yaxis= dict(
        linecolor = 'grey',
        mirror = True,
        domain = [0, 0.75],
        autorange = 'reversed', 
        zeroline = False,
        showgrid = False,
        showticklabels = True,
        tickcolor = 'white'
    ),
    xaxis2 = dict(
        range = [0, 0.45],
        linecolor = 'grey',
        mirror = True,
        domain = [0.88, 1],
        zeroline = True,
        showgrid = True,
        tickformat="%",
        dtick = 0.15
    ),
    xaxis3 = dict(
        domain = [0, 0.86],
        anchor = 'y2',
        linecolor = 'grey',
        mirror = True,
        showticklabels = False,
        showgrid = True,
        tick0 = -0.5,
        dtick = 1,
    ),
    yaxis2 = dict(
        range = [0, 0.45],
        linecolor = 'grey',
        mirror = True,
        domain = [0.78, 1],
        zeroline = True,
        showgrid = True,
        tickformat="%",
        dtick = 0.15
    ),
    yaxis3 = dict(
        autorange='reversed',
        domain = [0, 0.75],
        anchor = 'x2',
        linecolor = 'grey',
        mirror = True,
        showticklabels = False,
        showgrid = True,
        tick0 = -0.5,
        dtick = 1,
    ),
    xaxis4 = dict(
        range = [-0.5, 23.5],
        domain = [0, 0.86],
        zeroline = False,
        showgrid = False,
        tickmode = 'array',
        tick0 = -0.5,
        dtick = 1,
        ticktext=x_labels,
        tickvals=np.arange(25),
        tickangle = 90,
        overlaying = 'x1',
    ),
    yaxis4= dict(
        range = [0, 12],
        domain = [0, 0.75],
        autorange = 'reversed', 
        zeroline = False,
        showgrid = True,
        tick0 = 0,
        dtick = 1,
        showticklabels = False,
        overlaying = 'y1',
        ticklen = 5,
    ),
    bargap = 0.05,
    showlegend = False,
)

po.iplot(go.Figure(data = data, layout = layout))

ii. Peak Shape

In [3]:
yrs, yre = 2002, 2016
ntop = 5

tops = get_tops(master, ntop, 'mw')[['mw', 'base_period']]
slc = tops.loc[(tops.base_period > yrs - 1) & (tops.base_period < yre + 1), 'mw']
dates = slc.index.date
points = pd.Series()
for date in dates:
    points = points.append(master.loc[(master.index.date == date), 'mw'])
points = pd.DataFrame(points, columns = ['mw'])
points['dummy'] = points.index.hour   
dummy = pd.date_range('1990-01-01 00:00', periods = 24, freq = 'H')
x_labels = dummy.strftime('%I%p')
x_labels = [x[0].replace('0','') + x[1:] for x in x_labels]
x_range = list(range(24))
data = []
if yrs == yre:
    title_str = str(yrs)
else:
    title_str = str(yrs) + ' - ' + str(yre)

# area plot of all data point in range
winter = points[(points.index.month > 10) | (points.index.month < 5)]
summer = points[(points.index.month > 4) & (points.index.month < 11)]

trace0 = go.Scatter(
    x = x_range,
    y = summer.groupby('dummy').min()['mw'],
    mode = 'line',
#    opacity = 0.5,
    line = dict(
        color='transparent'
    ),
    name = 'Summer Low',
    visible = True,
    showlegend = False,
    hoverlabel = dict(
        bgcolor = get_colors(2, 1),
#        font = {color: 'black'}
    )
)

trace1 = go.Scatter(
    x = x_range,
    y = summer.groupby('dummy').max()['mw'],
    fill='tonexty',
    fillcolor=get_colors(2, 0.8),
    mode='line',
    line=dict(
        color='transparent'
    ),
    name = 'Summer Range',
    visible = True,
    hoverinfo='none',
)

trace2 = go.Scatter(
    x = x_range,
    y = summer.groupby('dummy').max()['mw'],
    mode='line',
    line=dict(
        color='transparent'
    ),
    name = 'Summer High',
    visible = True,
    showlegend = False,
    hoverlabel = dict(
        bgcolor = get_colors(2, 1),
#        font = {color: 'black'}
    )
)

# mean
trace4 = go.Scatter(
    x = x_range,
    y = summer.groupby('dummy').mean()['mw'],
#    fill='tonexty',
    mode = 'line',
    line = dict(
        width = 1.5,
        color = get_colors(-1, 1),
        dash = 'dot'
    ),
    name = 'Summer Mean',
    visible = True,)

data.extend([trace0, trace1, trace2, trace4])

trace0 = go.Scatter(
    x = x_range,
    y = winter.groupby('dummy').min()['mw'],
    mode = 'line',
#    opacity = 0.5,
    line = dict(
        color='transparent'
    ),
    name = 'Winter Low',
    visible = True,
    showlegend = False,
)

trace1 = go.Scatter(
    x = x_range,
    y = winter.groupby('dummy').max()['mw'],
   fill='tonexty',
   fillcolor=get_colors(-1, 0.4),

    mode='line',
    line=dict(
        color='transparent'
    ),
    name = 'Winter Range',
    visible = True,
    hoverinfo='none',
)

trace2 = go.Scatter(
    x = x_range,
    y = winter.groupby('dummy').max()['mw'],
    mode='line',
    line=dict(
        color='transparent'
    ),
    name = 'Winter High',
    visible = True,
    showlegend = False,
)

# mean
trace4 = go.Scatter(
    x = x_range,
    y = winter.groupby('dummy').mean()['mw'],
#    fill='tonexty',
    mode = 'line',
    line = dict(
        width = 1.5,
        color = get_colors(1, 1),
        dash = 'dot'),
    name = 'Winter Mean',
    visible = True,)

data.extend([trace0, trace1, trace2, trace4])

for date in dates:
    slc = points.loc[points.index.date == date]
#    slc = slc.sort_values(by = 'dummy')
    trace = go.Scatter(
        x = slc.dummy,
        y = slc.mw,
        mode = 'line',
        line = dict(
            width = 1.5,
            color = 'red',
        ),
        name = date.strftime('%Y-%m-%d'),
        visible = False,)
    data = data + [trace]

for i in range(9):
    data[i]['visible'] = True

steps = []
labels = [date.strftime('%Y-%m-%d') for date in dates]
for i in range(int(len(points)/ 24)):
    step = dict(
        method = 'restyle',
        args = ['visible', [True] * 8 + [False] * int(len(points)/24)],
        label = labels[i]
    )
    step['args'][1][i + 8] = True
    steps.append(step)


sliders = [dict(
    active = 0,
    currentvalue = {"prefix": "Peak Date: "},
    pad = {"t": 60},
    steps = steps,
)]

layout = go.Layout(
    autosize=False,
    width=900,
    height=600,
#     autosize = True,
#    margin=dict(t=50, b=0, l=50, r=0),
    title = 'Top ' + str(ntop) + ' Peak Range and Individual Peaks<br>(' + title_str + ')',
#    updatemenus = updatemenus,
    sliders = sliders,
#    annotations = annotations,
#    showlegend = True,
    xaxis = dict(
#        title = 'Hours',
        ticklen = 7,
        showticklabels=True,
        ticktext=x_labels,
        tickvals=x_range,
        tickangle = 90,
        linecolor = 'grey',
        mirror = True,
    ),
    yaxis = dict(
#        zeroline = False,
#        showgrid = False,
        title = 'Demand [MW]',
        range = [12000, 28000],
        linecolor = 'grey',
        mirror = True,
    ),
    legend=dict(
        x=0,
        y=1,
        traceorder='normal',
        bgcolor='transparent',
#        bordercolor='grey',
#        borderwidth=1
    )
)

po.iplot(go.Figure(data = data, layout = layout))

IESO Forecast

More information about IESO's planning and forecasting can be find here. The Near Real-Time (days ahead and current day forecasts) takes into account:

  • the day of the week;
  • detailed weather forecasts;
  • historical demand;
  • embedded (or distribution-connected) generation;
  • system conditions; and
  • special events or holidays.

i. Forecast Mode and Demand Delta

IESO provides three forecast mode: Average, Peak and Forecast (combination of the average and peak algorithm). This is a quick hist-and-rug plot showing the distrbution of delta between the forecasted and actual demand, on hourly basis.

In [4]:
delta1 = master.peak - master.mw
delta2 = master.average - master.mw
delta3 = master.forecast - master.mw

delta1.dropna(inplace = True)
delta2.dropna(inplace = True)
delta3.dropna(inplace = True)

fig = ff.create_distplot(
    [delta1, delta2, delta3],
    ['Mode: Peak', 'Mode: Average', 'Mode: Forecast'],
    bin_size = [50, 50, 50],
    curve_type = 'normal',
    colors = [get_colors(-1, 1), get_colors(2, 1), get_colors(1, 1)],
)

trace1 = go.Box(
    x = delta1,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(-1, 0.8)
    ),
    showlegend = False,
    name = 'Mode: Peak',    
)

trace2 = go.Box(
    x = delta2,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(2, 1)
    ),
    showlegend = False,
    name = 'Mode: Average',
)

trace3 = go.Box(
    x = delta3,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(1, 1)
    ),
    showlegend = False,
    name = 'Mode: Forecast',
)

data = [trace1, trace2, trace3]

fig['data'] = fig['data'][:6]
fig['data'].extend(data)

fig['layout'].update(
    title = 'Distribution of Delta Between Forecasted and Actual Demand<br>(2017)',
    autosize=False,
    width=900,
    height=580,
    margin = dict(b = 40),
    xaxis = dict(
        range = [-1500, 1500],
        showgrid = False,
        title = 'Delta [MW]',
        position = 0,
    ),
    yaxis = dict(
        domain = [0.22, 1],
        autorange = True,
        showticklabels = False,
        showgrid = False,
        linecolor = 'grey',
        mirror = True,
        zeroline = True,
    ),
    xaxis2 = dict(
        range = [-1500, 1500],
        overlaying = 'x1',
        anchor = 'y1',
        linecolor = 'grey',
        mirror = True,
        showgrid = True,
        zeroline = False,
        showticklabels = False,        
    ),
    yaxis2 = dict(
        domain = [0, 0.2],
        showticklabels = False,
        showgrid = False,
    ),
    legend=dict(
        x=0,
        y=1,
        traceorder='normal',
        bgcolor='transparent',
#        bordercolor='grey',
#        borderwidth=1
    ),
)

po.iplot(fig)

ii. Time Delta

This plot shows the delta between IESO forecasted daily peak hour and actual daily peak hour. For most of times, the forecasted peak hour is right at or 1 hour behind the actual peak hour (i.e. forecast peak at 5pm, actual peak happens at 4pm).

In [5]:
slc = master[master.base_period == 2017]

actual = slc.loc[slc.groupby(pd.TimeGrouper('D'))['mw'].idxmax()][['mw']]
actual['hour0'] = actual.index.hour
actual.index = actual.index.date
peak = slc.loc[slc.groupby(pd.TimeGrouper('D'))['peak'].idxmax()][['peak']]
peak['hour1'] = peak.index.hour
peak.index = peak.index.date
average = slc.loc[slc.groupby(pd.TimeGrouper('D'))['average'].idxmax()][['average']]
average['hour2'] = average.index.hour
average.index = average.index.date
forecast = slc.loc[slc.groupby(pd.TimeGrouper('D'))['forecast'].idxmax()][['forecast']]
forecast['hour3'] = forecast.index.hour
forecast.index = forecast.index.date
top_dates = actual.sort_values(by = 'mw', ascending = False)[['mw']]
top_dates.dropna(inplace = True)
dates = top_dates
temp = pd.concat([dates, actual, peak, average, forecast], axis = 1, join = 'inner')
delta1 = temp.hour1 - temp.hour0
delta2 = temp.hour2 - temp.hour0
delta3 = temp.hour3 - temp.hour0

fig = ff.create_distplot(
    [delta1, delta2, delta3],
    ['Mode: Peak', 'Mode: Average', 'Mode: Forecast'],
    bin_size = [1, 1, 1],
    curve_type = 'normal',
    colors = [get_colors(-1, 1), get_colors(2, 1), get_colors(1, 1)],
)

trace1 = go.Box(
    x = delta1,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(-1, 0.8)
    ),
    showlegend = False,
    name = 'Mode: Peak',    
)

trace2 = go.Box(
    x = delta2,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(2, 1)
    ),
    showlegend = False,
    name = 'Mode: Average',
)

trace3 = go.Box(
    x = delta3,
    xaxis = 'x1',
    yaxis = 'y2',
    marker = dict(
        color = get_colors(1, 1)
    ),
    showlegend = False,
    name = 'Mode: Forecast',
)

data = [trace1, trace2, trace3]

fig['data'] = fig['data'][:6]
fig['data'].extend(data)

fig['layout'].update(
    title = 'Distribution of Delta Between Forecasted Peak Hour and Actual Peak Hour<br>(2017)',
    autosize=False,
    width=900,
    height=580,
    margin = dict(b = 40),
    xaxis = dict(
        range = [-5, 6],
#        tick0 = -5,
        dtick = 1,
        showgrid = False,
        title = 'Delta [MW]',
        position = 0,
        ticktext = ['%+d'% x + 'Hr' for x in np.arange(-5, 6)],
        tickvals = np.arange(-5, 6) + 0.5
    ),
    yaxis = dict(
        domain = [0.22, 1],
        autorange = True,
        showticklabels = False,
        showgrid = False,
        linecolor = 'grey',
        mirror = True,
        zeroline = True,
    ),
    xaxis2 = dict(
        range = [-5, 6],
        dtick = 1,
        overlaying = 'x1',
        anchor = 'y1',
        linecolor = 'grey',
        mirror = True,
        showgrid = True,
        zeroline = False,
        showticklabels = False,        
    ),
    yaxis2 = dict(
        domain = [0, 0.2],
        showticklabels = False,
        showgrid = False,
    ),
    legend=dict(
        x=0,
        y=1,
        traceorder='normal',
        bgcolor='transparent',
#        bordercolor='grey',
#        borderwidth=1
    ),
)

po.iplot(fig)

Weather Correlation

The Climate Canada weather data includes hourly record for the following:

  • temperature (°C)
  • dew point temperature (°C)
  • relative humidity (%) - this was converted to absolute humidity in kg/m3 using temperature and station pressure
  • wind Direction (deg) - not included in correlation
  • wind speed (kW/h)
  • visibility (km),
  • station pressure (kPa),
  • humidex
  • wind chill

Not all records are available for all stations and durations. I grouped up the stations by proximity to major urban population areas and for each area, average reading (excluding missing record) from closest five stations are used. The area readings are then aggregated together to get the weighted average (based on population) for the whole province.

i. Overview

A quick plot of Ontario Demand versus differen weather attributes by season. This is to give an intuition of possible bivariate replationships (the better the data group together and form a "straight line", the better the correlation). - Note: only 2016 data are plotted here for faster rendering. The correlation calculations in next section are based on 2012 - 2017 data.

In [6]:
slc = master[master.base_period == 2016]
slc = slc.iloc[:, :-4]
slc.columns = ['Ontario Demand', 'Temperature', 'Humidity', 'Humidex', 'Wind Chill', 'Visibility', 'Wind Speed', 'Dew Point Temperature', 'Season']

data = []

for col in slc.iloc[:, 1:-1].columns:
    trace1 = go.Scatter(
        x = slc.loc[slc.Season == 'Summer', col],
        y = slc.loc[slc.Season == 'Summer', 'Ontario Demand'],
        mode = 'markers',
        marker = dict(
            size = 10,
            color = get_colors(3, 0.3),
            line = dict(
                width = 1,
                color = get_colors(3, 0.8)
            ),
        ),
        name = 'Summer',
        hoverinfo='none',
        visible = False,
    )

    trace2 = go.Scatter(
        x = slc.loc[slc.Season == 'Winter', col],
        y = slc.loc[slc.Season == 'Winter', 'Ontario Demand'],
        mode = 'markers',
        marker = dict(
            size = 10,
            color = get_colors(-1, 0.1),
            line = dict(
                width = 1,
                color = get_colors(-1, 0.3)
            ),
        ),
        name = 'Winter',
        hoverinfo='none',
        visible = False,
    )
        
    data += [trace1, trace2]


data[0]['visible'] = True
data[1]['visible'] = True

button_list = []
label_list = list(slc.iloc[:, 1:-1].columns)
title_list = ['Temperature [°C]', 'Humidity [kg/m3]', 'Humidex', 'Wind Chill', 
              'Visibility [km]', 'Wind Speed [km/h]', 'Dew Point Temperature [°C]']

for i in range(len(slc.iloc[:, 1:-1].columns)):    
    button = dict(
        label = label_list[i],
        method = 'update',
        args = [
            {'visible': [False] * (i * 2) + [True] * 2 + [False] * (len(slc.iloc[:, 1:-1].columns) - i - 1)*2},
            {'xaxis': {
                'title': title_list[i],
                'zeroline': False,
                'linecolor': 'grey',
                'mirror': True}
            },
        ]
    )
    button_list += [button]

updatemenus = list([
    dict(type="buttons",
        active=0,
        buttons=button_list,
        direction = 'left',
        x = 0,
        xanchor = 'left',
        y = 1.1,
        yanchor = 'top',
        pad = {'r': 0, 't': 0},
        bgcolor = 'rgba(128, 128, 128, 0.2)',
#        active = 99,
        bordercolor = get_colors(-1, 0.8),
        ),
])

layout = go.Layout(
    autosize=False,
    width=900,
    height=600,
#    hovermode = None,
    title='Ontario Demand vs. Climate Attribute (2016)',
    updatemenus = updatemenus,
    xaxis = dict(
#        range = [5, 35],
        linecolor = 'grey',
        mirror = True,
        title='Temp [°C]',
        zeroline = False,
    ),
    yaxis = dict(
#        range = [10000, 22000],
        linecolor = 'grey',
        mirror = True,
#        side = 'right',
        title = 'Demand [MW]',
    ),
)

fig = go.Figure(data=data, layout=layout)

po.iplot(fig)

ii. Pearson Correlation

This table shows the correlation (squared) between Ontario Demand and different weather attraibutes.

By season:

In [7]:
slc = master['2012':]
slc = slc.iloc[:, :-4]
slc.columns = ['Ontario Demand', 'Temperature', 'Humidity', 'Humidex', 'Wind Chill', 'Visibility', 'Wind Speed', 'Dew Point', 'Season']
corr = slc.groupby('Season').corr() ** 2
corr = corr[list(corr.index.levels[1] == 'Ontario Demand') * 2]
corr.reset_index(level = 1, inplace = True, drop = True)
corr.drop('Ontario Demand', axis = 1, inplace = True); corr
Out[7]:
Dew Point Humidex Humidity Temperature Visibility Wind Chill Wind Speed
Season
Summer 0.236395 0.443446 0.252792 0.628004 0.000005 0.000696 0.090580
Winter 0.172214 NaN 0.153867 0.151738 0.003756 0.156979 0.013178

By season, day of week and time of day:

In [8]:
slc['Day of Week'] = ['Weekday' if d < 5 else 'Weekend' for d in slc.index.dayofweek]
slc['Time of Day'] = ['Evening' if (h > 18) | (h < 7) else 'Daytime' for h in slc.index.hour]
corr = slc.groupby(['Season', 'Day of Week', 'Time of Day']).corr() **2; corr
corr.reset_index(level = 3, inplace = True)
corr = corr[corr.level_3 == 'Ontario Demand']
corr.drop(['level_3', 'Ontario Demand'], axis = 1, inplace = True); corr
Out[8]:
Dew Point Humidex Humidity Temperature Visibility Wind Chill Wind Speed
Season Day of Week Time of Day
Summer Weekday Daytime 0.485060 0.606150 0.512263 0.743307 0.041484 0.002192 0.008681
Evening 0.195078 0.325943 0.213923 0.523041 0.000059 0.000839 0.032127
Weekend Daytime 0.405515 0.571183 0.424959 0.691664 0.022312 0.002478 0.024426
Evening 0.219273 0.394470 0.233178 0.570166 0.000695 0.000718 0.040012
Winter Weekday Daytime 0.265479 NaN 0.225032 0.331034 0.005812 0.305655 0.005053
Evening 0.190187 NaN 0.171218 0.168037 0.023654 0.169895 0.004475
Weekend Daytime 0.233835 NaN 0.220930 0.278659 0.020896 0.253594 0.000206
Evening 0.228179 NaN 0.216150 0.198884 0.009964 0.204668 0.010211

Master Data

Time Series

Tips: Click on legend to show/hide series. Drag on main plot area to zoom in and double click to zoom back. Drag active window on subplot (at bottom) to "pan".
- Note the years represent the corresponding ICI Base Period, which is from May 1 to Apr 30. For example, 2012 would mean base period May 1, 2012 to Apr 30, 2013.

In [9]:
yrs, yre = 2002, 2017
ntop = 5

# subtitle
# if yrs == yre:
#     title_str = 'Base Period: ' + str(yrs)
# else:
#     title_str = 'Base Period: ' + str(yrs) + ' - ' + str(yre)
# if yrs >= 2012:
#     y1lim = 26000
#     y2lim = 40
# else:
#     y1lim = 28000
#     y2lim = 45

# lines
slc = master[(master.base_period >= yrs) & (master.base_period <= yre)]
trace0 = go.Scatter(
    x = slc.index,
    y = slc.average,
    mode = 'line',
    name = 'Forecasted Average',
    opacity = 0.5,
    line = dict(
        color = 'transparent',
    ),
    showlegend = False,
)

trace1 = go.Scatter(
    x = slc.index,
    y = slc.peak,
    mode = 'line',
    name = 'Forecasted Range',
    opacity = 0.5,
    fill='tonexty',
    fillcolor='rgba(128, 128, 128, 0.6)',
    line = dict(
        width = 1,
        color = 'rgba(128, 128, 128, 0.6)',
    ),
    hoverinfo='none',
)

trace2 = go.Scatter(
    x = slc.index,
    y = slc.peak,
    mode = 'line',
    name = 'Forecasted Peak',
    opacity = 0.5,
    line = dict(
        color = 'transparent',
    ),
    showlegend = False,
)

trace3 = go.Scatter(
    x = slc.index,
    y = slc.mw,
    mode = 'line',
    name = 'Ontario Demand',
#    opacity = 0.5,
    line = dict(
        width = 1,
        color = get_colors(-1, 0.9),
    )
)

temp = slc[slc.season == 'Summer']
trace4 = go.Scatter(
    x = temp.index,
    y = temp.temp,
    mode = 'line',
    name = 'Temperature',
#    opacity = 0.5,
    line = dict(
        width = 1.5,
        color = get_colors(3, 0.9),
#        dash = 'dot'
    ),
    yaxis = 'y2'
)

# markers
tops = get_tops(slc, ntop, 'forecast')
tops = tops['20170501':]
trace5 = go.Scatter(
    x = tops.index,
    y = tops.forecast,
    mode = 'markers',
    marker = dict(
        size = 6,
        color = get_colors(-1, 1),
        symbol = 'cross'),
    name = 'Top ' + str(ntop) + ' Forecasted')

tops = get_tops(slc, ntop, 'mw')
trace6 = go.Scatter(
    x = tops.index,
    y = tops.mw,
    mode = 'markers',
    marker = dict(
        size = 6,
        color = 'red',
        symbol = 'cross'),
    name = 'Top ' + str(ntop) + ' Actual')

tops = get_tops(slc, ntop, 'temp')
tops = tops['20120501':]
trace7 = go.Scatter(
    x = tops.index,
    y = tops.temp,
    mode = 'markers',
    marker = dict(
        size = 6,
        color = get_colors(4, 1),
        symbol = 'cross'),
    name = 'Top ' + str(ntop) + ' Temp.',
    yaxis = 'y2'
)

# tops = get_mins(slc, ntop, 'temp')
# tops = tops[:'20170430']
# trace8 = go.Scatter(
#     x = tops.index,
#     y = tops.temp,
#     mode = 'markers',
#     marker = dict(
#         size = 6,
#         color = get_colors(4, 1),
#         symbol = 'cross'),
#     line = dict(
#         width = 1),
#     name = 'Min ' + str(ntop) + ' Temp.',
#     yaxis = 'y2'
# )

data = [trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7]
# data.append(trace8)

button_list1 = [dict(
    label = 'Reset',
    method = 'relayout',
    args = ['xaxis.range', [slc.index[0], slc.index[-1]]]
)]

for i in range(2002, 2017, 5):
    if i == 2012:
        inc = 5
        label = str(i) + ' to Date'
    else:
        inc = 4
        label = str(i) + ' to ' + str(i + inc)
    temp = slc[(slc.base_period >= i) & (slc.base_period <= i + inc)]
    button = dict(
        label = label,
        method = 'relayout',
        args = [{
            'xaxis.range': [temp.index[0], temp.index[-1]],
#            'xaxis.rangeslider.range': [temp.index[0], temp.index[-1]],
        }]
    )
    button_list1.append(button)  
    
button_list2 = [dict(
    label = 'View Year',
    method = 'relayout',
    args = []
)]
    
for i in range(2012, 2018):
    temp = slc[slc.base_period == i]
    button = dict(
        label = str(i),
        method = 'relayout',
        args = [{
            'xaxis.range': [temp.index[0], temp.index[-1]],
#            'xaxis.rangeslider.range': [temp.index[0], temp.index[-1]],
        }]
    )
    button_list2.append(button)
        
updatemenus = list([
    dict(
        type="buttons",
        active=-1,
        buttons=button_list1,
        direction = 'left',
        x = 0,
        xanchor = 'left',
        y = 1.12,
        yanchor = 'top',
        pad = {'r': 0, 't': 0},
#        bgcolor = 'rgba(128, 128, 128, 0.2)',
#        active = 99,
#        bordercolor = get_colors(-1, 0.8),
        ),
    dict(
#        type="buttons",
        active=0,
        buttons=button_list2,
        direction = 'down',
        x = 0.61,
        xanchor = 'left',
        y = 1.12,
        yanchor = 'top',
        pad = {'r': 0, 't': 0},
#        bgcolor = 'rgba(128, 128, 128, 0.2)',
#        active = 99,
#        bordercolor = get_colors(-1, 0.8),
        ),
])
    
layout = dict(
    autosize=False,
    width=900,
    height=600,
    hovermode = 'closest',
    updatemenus = updatemenus,
#    annotations = annotations,
    title='Time Series Data',
    xaxis=dict(
        range = [slc.index[0], slc.index[-1]],
        rangeslider=dict(
            range = [slc.index[0], slc.index[-1]],
        ),
        type='date',
        linecolor = 'grey',
        mirror = True,
    ),
    yaxis = dict(
        title = 'Demand [MW]',
        range = [10000, 28000],
        linecolor = 'grey',
        mirror = True,
        dtick = 2000,
    ),
    yaxis2=dict(
        title='Temp [°C]',
        overlaying='y',
        side='right',
 #       autorange = 'reversed',
        zeroline = False,
        range = [0, 45],
        dtick = 5,
    ),
    legend=dict(
        x=1.05,
        y=1,
        traceorder='normal',
        bgcolor='transparent',
#        bordercolor='grey',
#        borderwidth=1
    )
)

po.iplot(go.Figure(data = data, layout = layout))

Pivot Table

Drag-and-Drop. Rank can be used to filter the top N (up to 50) peaks of each base period.
- It is important to remember that Coincident Peaks need to be on unique days over a base period (i.e. only one highest demand of the day counts!).

In [10]:
data_table = master.copy()
data_table.columns = [
    'Ontario Demand', 'Temperature', 'Absolute Humidity', 'Humedix', 'Wind Chill', 'Visibility', 
    'Windspeed', 'Dew Point Temperature', 'Season', 'Forecast (Avg)', 'Forecast (Peak)', 'Forecast', 'Base Period']
data_table['Month'] = data_table.index.strftime('%b')
data_table['Day of Week'] = data_table.index.strftime('%a')
data_table['Hour'] = data_table.index.hour
data_table['Date'] = data_table.index.date
ranks = master.loc[master.groupby(pd.TimeGrouper('D'))['mw'].idxmax(), ['mw', 'base_period']]
ranks = ranks[ranks.index.notnull()]
ranks = ranks.groupby('base_period').rank(method = 'first', ascending = False)
data_table['Rank'] = ranks
data_table['Rank'] = pd.cut(
    data_table['Rank'],
    bins = list(range(0, 55, 5)),
    labels = ['Top 5'] + ['Top ' + str(i + 1) + ' - ' + str(i + 5) for i in range(5, 50, 5)])
data_table = data_table[['Ontario Demand', 'Rank', 'Date', 'Hour', 'Base Period', 'Season', 'Month', 'Day of Week']]
data_table.reset_index(inplace = True, drop = True)

pivot_ui(data_table)
Out[10]: